# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html


# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
from elasticsearch import Elasticsearch
import json

class SinaPipeline:
    HOSTS = '49.234.190.147'
    INDEX = 'sina-post'  # 索引（类似MySQL库名）
    def open_spider(self,spider):
        self.es = Elasticsearch(self.HOSTS)
    def process_item(self, item, spider):
        item_dict = dict(item)
        # 转换为json字符串并追加,逗号
        json_str = json.dumps(item_dict)
        self.es.index(index=self.INDEX, doc_type="sina-post",id=item["id"], body=json_str)
        return item
